MYDB=$DB_CENSUS
INPUT_FOLDER=../data/raw_data/census
OUTPUT_FOLDER=../data/data_csv
GEO_YEAR=2017
CENSUS_YEAR=2012

mkdir -p $INPUT_FOLDER

#################################################################################################################################
########## DOWNLOAD INPUT (RAW) FILES MADE PUBLICLY AVAILABLE BY INSEE ##########
CUR_FOLDER=${PWD}
cd $INPUT_FOLDER

### Download file of logements
mkdir -p Logement/2012
cd Logement/2012
curl https://www.insee.fr/fr/statistiques/fichier/1913207/rp2012_logemt_txt.zip --output rp2012_logemt_txt.zip
unzip rp2012_logemt_txt.zip
rm rp2012_logemt_txt.zip
cd ../..


### Download file of professional mobilities
mkdir -p MobiliteProfessionnelle/2014
cd MobiliteProfessionnelle/2014
curl https://www.insee.fr/fr/statistiques/fichier/2866308/rp2014_mobpro_txt.zip --output rp2014_mobpro_txt.zip
unzip rp2014_mobpro_txt.zip
rm rp2014_mobpro_txt.zip
cd ../..

### Download file of formations
mkdir -p Formation/2012
cd Formation/2012
curl https://www.insee.fr/fr/statistiques/fichier/2046643/BTT_TD_FOR2_2012.zip --output BTT_TD_FOR2_2012.zip
unzip BTT_TD_FOR2_2012.zip
rm BTT_TD_FOR2_2012.zip
cd ../..

### Download file of income
mkdir -p Revenu_Pauvrete/2012
cd Revenu_Pauvrete/2012
curl https://www.insee.fr/fr/statistiques/fichier/1895078/revenu-pauvrete-menage-2012.zip --output revenu-pauvrete-menage-2012.zip
unzip revenu-pauvrete-menage-2012.zip
rm revenu-pauvrete-menage-2012.zip
# Open base-cc-filosofi-12.xls in Excel, and save the first tab as base-cc-filosofi-12_COM.csv (remove the first 5 rows)
python3 $CUR_FOLDER/saveExcelAsCSV.py revenu-pauvrete-menage-2012/base-cc-filosofi-12.xls revenu-pauvrete-menage-2012/base-cc-filosofi-12_COM.csv COM 5
cd ../..

cd $CUR_FOLDER
#################################################################################################################################


# Update table all_communes:  I need to convert across the geographies defined in different years
sqlite3 $MYDB "update all_communes set POLE = '49160' where CodeInsee = '44060';"
sqlite3 $MYDB "update all_communes set POLE = 51171 where CodeInsee = '02344';"
sqlite3 $MYDB "update all_communes set POLE = 69286 where CodeInsee = '01132';"
sqlite3 $MYDB "update all_communes set POLE = 91613 where CodeInsee = '78173';"
sqlite3 $MYDB "update all_communes set POLE = 91174 where CodeInsee = '78221';"
sqlite3 $MYDB "update all_communes set POLE = 95355 where CodeInsee = '78025';"
sqlite3 $MYDB "update all_communes set POLE = 95355 where CodeInsee = '78065';"


################################### INCOME ########################################
# Import Revenu_Pauvrete (data from Filosofi file)
INCOME_FILE=$INPUT_FOLDER/Revenu_Pauvrete/2012/revenu-pauvrete-menage-2012/base-cc-filosofi-12_COM.csv
importCSVFile $MYDB $INCOME_FILE income0
sqlite3 $MYDB "create table income as \
    select CODGEO as CodeInsee, LIBGEO as NomCommune, NBMENFISC12 as NumMenagesFiscaux, MED12 as MedianIncome \
    from income0;"
dropTables $MYDB income0

### Put income in the $GEO_YEAR geography
sqlite3 $MYDB "create table income2 as \
    select a.CodeInsee as OrigCodeInsee, b.POLE as CodeInsee, a.NumMenagesFiscaux, a.MedianIncome
    from income a, all_communes b \
    where a.CodeInsee = b.CodeInsee;"
equalJoin $MYDB 1 income3 income2 mycommunes CodeInsee
dropTables $MYDB income income2
# NOTE: There is no income data for the following communes: 09304 26274 29083 29084 55138 76095 76601

# Aggregate at (new) CodeInsee level, by taking weighted average across all (old) CodeInsee, where the weight is calculated proportionate
# to the number of menages fiscaux
calcShares $MYDB 1 weights income3 NumMenagesFiscaux CantonId
sqlite3 $MYDB "update weights set Pctage_NumMenagesFiscaux = NULL where Pctage_NumMenagesFiscaux = 0.0;"
sqlite3 $MYDB "create table income4 as \
    select CantonId, sum(Pctage_NumMenagesFiscaux * MedianIncome) as MedianIncome \
    from weights \
    group by CantonId \
    order by CantonId;"

sqlite3 $MYDB "create table Income_CantonId as \
    select a.CantonId, b.MedianIncome \
    from Scope_CantonId a left join income4 b on a.CantonId = b.CantonId;"

dropTables $MYDB weights income4
dropTables $MYDB income3


################################### HOUSEHOLDS (file logements) ########################################
# Import Logements file from INSEE census
LOGEMENT_FILE=$INPUT_FOLDER/Logement/${CENSUS_YEAR}/FD_LOGEMT_${CENSUS_YEAR}.txt
importCSVFile2 $MYDB $LOGEMENT_FILE logements ";"

# Reduce to relevant columns
selectColumns $MYDB 1 logements2 logements COMMUNE IPONDL INPER INP19M GARL VOIT DIPLM EMPLM INPAM INPOM TACTM CATL
dropTables $MYDB logements

# Keep only residences principales
sqlite3 $MYDB "delete from logements2 where CATL != 1;"

# Get communes into the 2017 geography
sqlite3 $MYDB "create table logements3 as \
    select b.POLE as CodeInsee, IPONDL, INPER, INP19M, GARL, VOIT, DIPLM, EMPLM, INPAM, INPOM, TACTM, \
        0 as HH_has_car, 0 as INP19M_minus_VOIT \
    from logements2 a, all_communes b \
    where a.COMMUNE = b.CodeInsee;"
sqlite3 $MYDB "update logements3 set GARL=0 where GARL!=2;"
sqlite3 $MYDB "update logements3 set GARL=1 where GARL=2;"
sqlite3 $MYDB "update logements3 set VOIT=0 where VOIT!=1 and VOIT!=2 and VOIT!=3;"
sqlite3 $MYDB "update logements3 set HH_has_car=1 where VOIT!=0;"
sqlite3 $MYDB "update logements3 set INP19M_minus_VOIT = INP19M - VOIT;"

# Aggregate things per CodeInsee
sqlite3 $MYDB "create table logements4 as select CodeInsee, \
        sum(IPONDL * INPER) as Population, sum(IPONDL) as NumHHs, \
        sum(IPONDL*VOIT) as NumCars, sum(IPONDL*HH_has_car) as NumHHsWithCar, \
        sum(IPONDL*(1-HH_has_car)) as NumHHsWithoutCar, \
        sum(IPONDL*INP19M) as NumPersons19orOlder, sum(IPONDL*GARL) as NumHHs_with_garage, \
        sum(IPONDL*INP19M_minus_VOIT) as INP19M_minus_VOIT, \
        sum(IPONDL*INPAM) as NumPersonnesActives, sum(IPONDL*INPOM) as NumPersonnesActivesWithEmploi \
    from logements3 group by CodeInsee order by CodeInsee;"

# Merge CodeInsee-level data with table of communes
equalJoin $MYDB 1 logements5 logements4 mycommunes CodeInsee
# NOTE: There is no data for CodeInsee's: 55138 and 76601
dropTables $MYDB logements2 logements3 logements4

# Aggregate data at canton level
sqlite3 $MYDB "create table tmp1 as select CantonId, \
    sum(Population) as Population, sum(NumHHs) as NumHHs, \
    sum(NumCars) as NumCars, sum(NumHHsWithCar) as NumHHsWithCar, \
    sum(NumHHsWithoutCar) as NumHHsWithoutCar, \
    sum(NumPersons19orOlder) as NumPersons19orOlder, sum(NumHHs_with_garage) as NumHHs_with_garage, \
    sum(INP19M_minus_VOIT) as INP19M_minus_VOIT, \
    sum(NumPersonnesActives) as NumPersonnesActives, sum(NumPersonnesActivesWithEmploi) as NumPersonnesActivesWithEmploi, \
        1 - 1.0*sum(NumPersonnesActivesWithEmploi)/sum(NumPersonnesActives) as UnemploymentRate \
    from logements5 group by CantonId;"
    
# Merge with area and calculate density of population
 sqlite3 $MYDB "create table tmp2 as \
    select a.*, 1.0*a.Population/b.Area as PopDensity \
    from tmp1 a, Area_CantonId b \
    where a.CantonId = b.CantonId;"

# Merge with Scope
sqlite3 $MYDB "create table Census_CantonId as \
    select a.CantonId, b.Population, b.NumHHs, b.NumCars, b.NumHHsWithCar, \
        b.NumHHsWithoutCar, b.NumPersons19OrOlder, b.NumHHs_with_garage, \
        b.INP19M_minus_VOIT, b.UnemploymentRate, b.PopDensity \
    from Scope_CantonId a left join tmp2 b on a.CantonId = b.CantonId;"

# Output PopulationPerCanton.csv
sqlite3 $MYDB "create table tmp3 as \
    select a.CantonId, b.RegionId, a.Population \
    from Scope_CantonId b left join Census_CantonId a \
    on a.CantonId = b.CantonId;"
exportCSVFile $MYDB tmp3 $OUTPUT_FOLDER/SpatialMatrices/PopulationPerCanton.csv

# Clean up
dropTables $MYDB tmp1 tmp2 tmp3 logements5



################################### FORMATIONS ########################################
# Population non scolarisée de 15 ans ou plus par sexe, âge et diplôme le plus élevé. Dictionary of values:
#01 : Pas de scolarité
#02 : Aucun diplôme mais scolarité jusqu'en école primaire ou au collège
#03 : Aucun diplôme mais scolarité au delà du collège
#11 : Certificat d'études primaires
#12 : BEPC, brevet élémentaire, brevet des collèges
#13 : Certificat d'aptitudes professionnelles, brevet de compagnon
#14 : Brevet d'études professionnelles
#15 : Baccalauréat général, brevet supérieur
#16 : Bac technologique ou professionnel, brevet professionnel ou de technicien, BEC, BEI, BEH, capacité en droit
#17 : Diplôme universitaire de 1er cycle, BTS, DUT, diplôme des professions sociales ou de santé, d'infirmier(ère)
#18 : Diplôme universitaire de 2ème ou 3ème cycle (y compris médecine, pharmacie, dentaire), diplôme d'ingénieur, d'une grande école, doctorat, etc.
FORMATION_FILE=$INPUT_FOLDER/Formation/${CENSUS_YEAR}/BTT_TD_FOR2_2012.txt
FORMATION_FILE2=$INPUT_FOLDER/Formation/${CENSUS_YEAR}/BTT_TD_FOR2_2012_2.txt
python3 replaceCommasByPeriods.py $FORMATION_FILE $FORMATION_FILE2 # Original French file uses comma as a decimal point, which causes an issue
importCSVFile2 $MYDB $FORMATION_FILE2 formations ";"
rm $FORMATION_FILE2

# Remove data about arrondissements municipaux
sqlite3 $MYDB "delete from formations where NIVGEO != 'COM';"

# Get communes into the 2017 geography
sqlite3 $MYDB "create table formations2 as \
    select b.POLE as CodeInsee, a.AGEQ65, a.DIPL, a.SEXE, a.NB \
    from formations a, all_communes b \
    where a.CODGEO = b.CodeInsee"
equalJoin $MYDB 1 formations3 formations2 mycommunes CodeInsee
# NOTE: The following two communes have no data: 55138 and 76601  (population of 149 and 252 inhabitants).
dropTables $MYDB formations formations2

# Here I delete population who is strictly younger than 25 years old (because they may not be able to drive, and because
# "population non scolarisée" introduces some selection bias due to scolarisation).
sqlite3 $MYDB "delete from formations3 where AGEQ65 = '015' or AGEQ65 = '020';"

# Aggregate across ages and genders (and communes that were regrouped in the same CodeInsee)
calcAggregate $MYDB 1 formations4 formations3 SUM NB CodeInsee CantonId DEP RegionId DIPL

# Put diploma levels as different columns
sqlite3 $MYDB "create table formations5 as \
    select CodeInsee, CantonId, DEP, RegionId, DIPL, SUM_NB, \
        case when DIPL = '01' then SUM_NB else 0 end as NB_DIPL_01, \
        case when DIPL = '02' then SUM_NB else 0 end as NB_DIPL_02, \
        case when DIPL = '03' then SUM_NB else 0 end as NB_DIPL_03, \
        case when DIPL = '11' then SUM_NB else 0 end as NB_DIPL_11, \
        case when DIPL = '12' then SUM_NB else 0 end as NB_DIPL_12, \
        case when DIPL = '13' then SUM_NB else 0 end as NB_DIPL_13, \
        case when DIPL = '14' then SUM_NB else 0 end as NB_DIPL_14, \
        case when DIPL = '15' then SUM_NB else 0 end as NB_DIPL_15, \
        case when DIPL = '16' then SUM_NB else 0 end as NB_DIPL_16, \
        case when DIPL = '17' then SUM_NB else 0 end as NB_DIPL_17, \
        case when DIPL = '18' then SUM_NB else 0 end as NB_DIPL_18 \
    from formations4;"
dropTables $MYDB formations3 formations4

sqlite3 $MYDB "create table tmp2 as \
    select CantonId, sum(SUM_NB) as SUM_NB, \
        SUM(NB_DIPL_01) as NB_DIPL_01, \
        SUM(NB_DIPL_02) as NB_DIPL_02, \
        SUM(NB_DIPL_03) as NB_DIPL_03, \
        SUM(NB_DIPL_11) as NB_DIPL_11, \
        SUM(NB_DIPL_12) as NB_DIPL_12, \
        SUM(NB_DIPL_13) as NB_DIPL_13, \
        SUM(NB_DIPL_14) as NB_DIPL_14, \
        SUM(NB_DIPL_15) as NB_DIPL_15, \
        SUM(NB_DIPL_16) as NB_DIPL_16, \
        SUM(NB_DIPL_17) as NB_DIPL_17, \
        SUM(NB_DIPL_18) as NB_DIPL_18 \
    from formations5 \
    group by CantonId;"

sqlite3 $MYDB "create table tmp3 as \
    select CantonId, \
        1.0*NB_DIPL_01/SUM_NB as Fraction_DIPL_01, 
        1.0*NB_DIPL_02/SUM_NB as Fraction_DIPL_02, 
        1.0*NB_DIPL_03/SUM_NB as Fraction_DIPL_03, 
        1.0*NB_DIPL_11/SUM_NB as Fraction_DIPL_11, 
        1.0*NB_DIPL_12/SUM_NB as Fraction_DIPL_12, 
        1.0*NB_DIPL_13/SUM_NB as Fraction_DIPL_13, 
        1.0*NB_DIPL_14/SUM_NB as Fraction_DIPL_14, 
        1.0*NB_DIPL_15/SUM_NB as Fraction_DIPL_15, 
        1.0*NB_DIPL_16/SUM_NB as Fraction_DIPL_16, 
        1.0*NB_DIPL_17/SUM_NB as Fraction_DIPL_17, 
        1.0*NB_DIPL_18/SUM_NB as Fraction_DIPL_18 \
    from tmp2;"

sqlite3 $MYDB "create table Diplomas_CantonId as \
    select a.CantonId, \
        b.Fraction_DIPL_01, \
        b.Fraction_DIPL_02, \
        b.Fraction_DIPL_03, \
        b.Fraction_DIPL_11, \
        b.Fraction_DIPL_12, \
        b.Fraction_DIPL_13, \
        b.Fraction_DIPL_14, \
        b.Fraction_DIPL_15, \
        b.Fraction_DIPL_16, \
        b.Fraction_DIPL_17, \
        b.Fraction_DIPL_18 \
    from Scope_CantonId a left join tmp3 b on a.CantonId = b.CantonId;"
dropTables $MYDB tmp2 tmp3
dropTables $MYDB formations5


################################### MOBILITES PROFESSIONNELLES ########################################
MOBI_PRO_FILE=$INPUT_FOLDER/MobiliteProfessionnelle/2014/FD_MOBPRO_2014.txt
importCSVFile2 $MYDB $MOBI_PRO_FILE mobis0 ";"

#COMMUNE=departement et commune du lieu de residence  (75056 for Paris, 69123 for Lyon, 13055 for Marseille)
#DCLT=departement et commune du lieu de residence (specific to arrondissement for Paris, Lyon and Marseille)
# 	99999 means that the individu travaille a l'etranger
#REGION=region du lieu de residence
#REGLT=region du lieu de travail

# Put Paris, Lyon and Marseille into 75056, 13055 and 69123
sqlite3 $MYDB "update mobis0 set DCLT='75056' where DCLT like '751%';"
sqlite3 $MYDB "update mobis0 set DCLT='13055' where DCLT like '132%';"
sqlite3 $MYDB "update mobis0 set DCLT='69123' where DCLT like '6938%';"

# Count the number of individus for each pair (commune de residence / commune de travail)
sqlite3 $MYDB "create table mobis as \
    select COMMUNE as CommuneResidence, DCLT as CommuneTravail, sum(IPONDI) as NumIndividus \
    from mobis0 \
    group by CommuneResidence, CommuneTravail \
    order by CommuneResidence,CommuneTravail;"

dropTables $MYDB mobis0


sqlite3 $MYDB "create table mobis2 as \
    select b.POLE as CommuneResidence,  a.CommuneTravail, a.NumIndividus \
    from mobis a left join all_communes b on a.CommuneResidence = b.CodeInsee;"

# The equal join below ignores those who work outside of France and / or in communes with weird codes (e.g. starting with 98...)
#sqlite3 $MYDB "delete from mobis2 where CommuneTravail = 99999;"
sqlite3 $MYDB "create table mobis3 as \
    select a.CommuneResidence, b.POLE as CommuneTravail, a.NumIndividus \
    from mobis2 a, all_communes b \
    where a.CommuneTravail = b.CodeInsee;"

# Treat the case of commune 51664 => 51171 (commune deleguee) and commune 55189 (no inhabitant... although people could work there)
sqlite3 $MYDB "update mobis3 set CommuneResidence = '51171' where CommuneResidence = '51664';"
sqlite3 $MYDB "update mobis3 set CommuneTravail = '51171' where CommuneTravail = '51664';"
sqlite3 $MYDB "delete from mobis3 where CommuneTravail = '55189';"

# Recompute aggregates (some communes got merged)
sqlite3 $MYDB "create table mobis4 as \
    select CommuneResidence, CommuneTravail, sum(NumIndividus) as NumIndividus \
    from mobis3 \
    group by CommuneResidence, CommuneTravail;"

# Merge with mycommunes to obtain CantonId, ARR_ID, DEP and RegionId
sqlite3 $MYDB "create table mobis5 as \
    select b.CodeInsee as CodeInsee_Residence, b.CantonId as CantonId_Residence, \
        b.DEP as DEP_Residence, b.RegionId as RegionId_Residence, a.CommuneTravail, a.NumIndividus \
    from mobis4 a, mycommunes b \
    where a.CommuneResidence = b.CodeInsee;"

sqlite3 $MYDB "create table mobis6 as \
    select a.CodeInsee_Residence, a.CantonId_Residence,  a.DEP_Residence, a.RegionId_Residence, \
        b.CodeInsee as CodeInsee_Travail, b.CantonId as CantonId_Travail, \
        b.DEP as DEP_Travail, b.RegionId as RegionId_Travail, \
        a.NumIndividus \
    from mobis5 a, mycommunes b \
    where a.CommuneTravail = b.CodeInsee;"

# Clean up
dropTables $MYDB mobis mobis2 mobis3 mobis4 mobis5
renameTable $MYDB mobis6 mobis

##############################
### MAKE SPATIAL MATRIX OF COMMUTING FROM EACH CANTON TO EACH CANTON
# Aggregate mobilities professionnelles at that geo-level
calcAggregate $MYDB 1 tmp1 mobis SUM NumIndividus CantonId_Residence CantonId_Travail

# Export these tables into a CSV file
exportCSVFile $MYDB tmp1 $OUTPUT_FOLDER/SpatialMatrices/MobilitePro.csv

# Clean up
dropTables $MYDB tmp1

###################
### MAKE NUMBER OF INCOMING / OUTGOING COMMUTERS FOR EACH CANTON
sqlite3 $MYDB "create table WorkersFromCantonId as \
    select CantonId_Residence as CantonId, sum(NumIndividus) as NumWorkersFromLocation \
    from mobis \
    group by CantonId_Residence;"

sqlite3 $MYDB "create table WorkersToCantonId as \
    select CantonId_Travail as CantonId, sum(NumIndividus) as NumWorkersToLocation \
    from mobis \
    group by CantonId_Travail;"

sqlite3 $MYDB "create table CommutersFromCantonId as \
    select CantonId_Residence as CantonId, sum(NumIndividus) as NumCommutersFromLocation \
    from mobis \
    where CantonId_Residence!= CantonId_Travail \
    group by CantonId_Residence;"

sqlite3 $MYDB "create table CommutersToCantonId as \
    select CantonId_Travail as CantonId, sum(NumIndividus) as NumCommutersToLocation \
    from mobis \
    where CantonId_Residence!= CantonId_Travail \
    group by CantonId_Travail;"


leftJoin $MYDB 1 a1 Scope_CantonId WorkersFromCantonId CantonId
leftJoin $MYDB 1 a2 a1 WorkersToCantonId CantonId
leftJoin $MYDB 1 a3 a2 CommutersFromCantonId CantonId
leftJoin $MYDB 1 a4 a3 CommutersToCantonId CantonId
selectColumns $MYDB 1 Commuting_CantonId a4 CantonId NumWorkersFromLocation NumWorkersToLocation NumCommutersFromLocation NumCommutersToLocation

dropTables $MYDB a1 a2 a3 a4
dropTables $MYDB WorkersFromCantonId WorkersToCantonId CommutersFromCantonId CommutersToCantonId

sqlite3 $MYDB "update Commuting_CantonId set NumWorkersFromLocation = 0   where NumWorkersFromLocation is null;"
sqlite3 $MYDB "update Commuting_CantonId set NumWorkersToLocation = 0     where NumWorkersToLocation is null;"
sqlite3 $MYDB "update Commuting_CantonId set NumCommutersFromLocation = 0 where NumCommutersFromLocation is null;"
sqlite3 $MYDB "update Commuting_CantonId set NumCommutersToLocation = 0   where NumCommutersToLocation is null;"

### Clean up
dropTables $MYDB mobis

###############################################################################################

### Clean up
dropTables $MYDB Scope_CantonId mycommunes all_communes
sqlite3 $MYDB "VACUUM;"
